package com.example.fast.common.util;

import lombok.experimental.UtilityClass;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

/**
 * 分词工具
 */
@UtilityClass
public class WordUtils {

    /**
     * 使用IK Analyzer分词，并统计词频
     *
     * @param text 原始文本
     * @throws IOException
     */
    public static Map<CharSequence, Integer> parse(String text) throws IOException {
        Map<CharSequence, Integer> wordsFren = new HashMap<>(32);
        IKSegmenter ikSegmenter = new IKSegmenter(new StringReader(text), true);
        Lexeme lexeme;
        while ((lexeme = ikSegmenter.next()) != null) {
            if (lexeme.getLexemeText().length() > 1) {
                if (wordsFren.containsKey(lexeme.getLexemeText())) {
                    wordsFren.put(lexeme.getLexemeText(), wordsFren.get(lexeme.getLexemeText()) + 1);
                } else {
                    wordsFren.put(lexeme.getLexemeText(), 1);
                }
            }
        }

        return wordsFren;
    }
}